#coding:utf-8
#博图列表页
from bs4 import BeautifulSoup
import requests
import os
import pymysql as mysql
import time
import math
import configparser

config=configparser.ConfigParser()	
with open("config.ini","r") as cfgfile:
	config.readfp(cfgfile)
	
	ROOT = config.get( "fileroot", "root" )
	
proxies = {
  "http": "192.168.0.71:8012",
}
hdrs = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.108 Safari/537.36',
		'Cookie':'ASP.NET_SessionId=5wck1vuidcqjp5at5habah45',
		'Host':'222.198.130.68',
		'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',}
def read_sql():
	conn = mysql.connect( host = '192.168.30.182', port = 3306, user = 'root', passwd = 'vipdatacenter', db = 'botu',charset='utf8mb4',)
	cur = conn.cursor()
	cur.execute('''select rawid from article where stat = 0 ''' )
	sql_data = cur.fetchall()
	conn.commit()
	conn.close()
	return sql_data
def write_sql(sql):
	conn = mysql.connect( host = '192.168.30.182', port = 3306, user = 'root', passwd = 'vipdatacenter', db = 'botu',charset='utf8mb4',)
	cur = conn.cursor()
	cur.execute(sql)
	conn.commit()
	conn.close()
	
def down(url,bookid):
	print(url)
	pathroot = os.path.join(ROOT,  time.strftime('%Y%m%d',time.localtime(time.time())),"html",str(math.ceil(int(bookid)/1000)))
	if not os.path.exists(pathroot):
		os.makedirs(pathroot)
	file_paht = os.path.join(pathroot,str(bookid)+'.html')
	if os.path.exists(file_paht):
		print("第"+str(bookid)+"条存在")
		return
	try:
		r = requests.get(url, proxies=proxies,headers = hdrs,timeout = 10)
	except:
		print("timeout")
		return
	
	if r.text.find("404 - 找不到文件或目录。")>-1:
		print(url)
		print("网络错误")
		time.sleep(1800)
		return
	soup = BeautifulSoup(r.content, 'lxml')
	div = soup.find('b',id = "bookname")
	if not div:
		print("html error")
		return
	with open(file_paht,'wb') as f:
		f.write(r.content)
	print(str(bookid)+'下载成功')
	sql = "update article set stat = 1 where rawid = '%s' " % (bookid)
	write_sql(sql)
if __name__=="__main__":
	
	sql_data  = read_sql()
	print(len(sql_data))
	for data in sql_data  :
		bookid = data[0]
		url = "http://222.198.130.68/BookRead.aspx?bookid="+str(bookid)
		down(url,bookid)